results_dir <- "/albona/nobackup2/yingxinl/melanoma_github/results/final"
figures_dir <- "/albona/nobackup2/yingxinl/melanoma_github/figures/final"
sourcedata_dir <- "/albona/nobackup2/yingxinl/melanoma_github/source_data/final"
dir.create(results_dir, recursive = TRUE)
dir.create(figures_dir, recursive = TRUE)
dir.create(sourcedata_dir, recursive = TRUE)

1 Package

library(SingleCellExperiment)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(ggpubr)
library(moon) # Yingxin's personal package
library(pheatmap)
library(reshape2)
library(gridExtra)
library(RColorBrewer)
library(UpSetR)
library(scattermore)
library(scater)
library(scran)
library(ggridges)
library(rcartocolor)
library(Rtsne)
library(ggalluvial)
library(ggrepel)
library(BiocParallel)
library(BiocSingular)
library(BiocNeighbors)
library(openxlsx)
ggplot2::theme_set(theme_bw() + theme_yx() + 
                     theme(axis.text.y = element_text(color = "black"),
                           axis.text.x = element_text(color = "black"),
                           axis.ticks.x = element_line(color = "black"),
                           axis.ticks.y = element_line(color = "black")))

2 Data

sce_melanoma <- readRDS(file.path(sourcedata_dir, "sce_melanoma_raw.rds"))

3 Quality control

nUMI <- colSums(counts(sce_melanoma))
summary(nUMI)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     223    2251    9524   13574   20104  188737
nGene <- colSums(counts(sce_melanoma) != 0)
summary(nGene)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      57     981    3019    3052    4747   10662
mt_genes <- grep("MT-", rowData(sce_melanoma)[, "Symbol"])
MT_prop <- colSums(counts(sce_melanoma)[mt_genes, ])/nUMI

sce_melanoma$nUMI <- nUMI
sce_melanoma$nGene <- nGene
sce_melanoma$MT_prop <- MT_prop
df_toPlot <- moon::makeMoonDF(sce_melanoma)
g1 <- ggplot(df_toPlot, aes(x = nUMI)) +
  geom_histogram(bins = 50) +
  scale_color_viridis_d() +
  theme(aspect.ratio = 0.6) +
  scale_x_log10() +
  geom_vline(xintercept = 500, color = "red")+
  geom_vline(xintercept = 80000, color = "red")


g2 <- ggplot(df_toPlot, aes(x = nGene)) +
  geom_histogram(bins = 50) +
  scale_color_viridis_d() +
  theme(aspect.ratio = 0.6) +
  scale_x_log10() +
  geom_vline(xintercept = 400, color = "red")


g3 <- ggplot(df_toPlot, aes(x = MT_prop)) +
  geom_histogram(bins = 50) +
  scale_color_viridis_d() +
  theme(aspect.ratio = 0.6) +
  geom_vline(xintercept = 0.2, color = "red") +
  xlab("MT%")
ggarrange(g1, g2, g3, ncol = 1, nrow = 3, align = "hv")

ggsave(file.path(figures_dir, "qc.pdf"), width = 6, height = 10)
saveRDS(df_toPlot[, c("nUMI", "nGene", "MT_prop")], file = file.path(sourcedata_dir, "qc.rds"))
keep <- nUMI < 80000 & nUMI > 500 & nGene > 400 & MT_prop < 0.2
table(keep)
## keep
##  FALSE   TRUE 
##  24233 168599
table(keep, sce_melanoma$Sample)
##        
## keep    SCC16_0069_B230216_DMSO SCC16_0069_B230216_DT SCC16_0500_B101017_DMSO
##   FALSE                     596                   835                     620
##   TRUE                     7253                  7480                   13036
##        
## keep    SCC16_0500_B101017_DT SCC20_0352_B291020_DMSO SCC20_0352_B291020_DT
##   FALSE                  1131                     703                   632
##   TRUE                   7514                   15321                 11986
##        
## keep    SMU09_0157_B110419_DMSO SMU09_0157_B110419_DT SMU17_0075_B070921_DMSO
##   FALSE                    2630                  2265                    1075
##   TRUE                     7791                  7560                    8286
##        
## keep    SMU17_0075_B070921_DT SMU17_0132_B180418_DMSO SMU17_0132_B180418_DT
##   FALSE                  1192                     752                  1737
##   TRUE                   5964                    6277                  6424
##        
## keep    SMU17_0209_B130617_DMSO SMU17_0209_B130617_DT SMU18_0017_B110518_DMSO
##   FALSE                    1216                  2223                    1412
##   TRUE                     8427                  6129                    7218
##        
## keep    SMU18_0017_B110518_DT SMU18_0283_B090119_DMSO SMU18_0283_B090119_DT
##   FALSE                   990                    1098                   553
##   TRUE                   7483                    9551                  8099
##        
## keep    SMU19_0306_B290620_DMSO SMU19_0306_B290620_DT
##   FALSE                    1469                  1104
##   TRUE                     9119                  7681

Keel cells with nUMI < 80000 & nUMI > 500 & nGene > 400 & MT_prop < 0.2

sce_melanoma <- sce_melanoma[, keep]
sce_melanoma <- sce_melanoma[rowSums(counts(sce_melanoma)) != 0, ]
sce_melanoma
## class: SingleCellExperiment 
## dim: 30986 168599 
## metadata(1): Samples
## assays(1): counts
## rownames(30986): ENSG00000237613 ENSG00000186092 ... ENSG00000277475
##   ENSG00000268674
## rowData names(3): ID Symbol Type
## colnames(168599): AAACCCAAGAAACTGT-1 AAACCCAAGAATCGAT-1 ...
##   TTTGTTGGTTGACGGA-20 TTTGTTGTCGGTGTTA-20
## colData names(15): Sample Barcode ... nGene MT_prop
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
sce_melanoma <- logNormCounts(sce_melanoma)

4 Cell type classification

refine_prediction <- readRDS("results/scClassify_selfTraining_prediction.rds")
refine_prediction <- refine_prediction[, 2]
label_len <- unlist(lapply(strsplit(refine_prediction, "_"), length))
refine_prediction[label_len > 1] <- "intermediate"
sce_melanoma$scClassify_celltype <- refine_prediction
celltype_color <- RColorBrewer::brewer.pal(12, "Paired")[c(1:10)]
celltype_color <- c(celltype_color, "grey70")

names(celltype_color) <- names(table(refine_prediction))
celltype_color["intermediate"] <- "grey90"
df_toPlot <- moon::makeMoonDF(sce_melanoma)
sce_melanoma$Patient_publish <- paste("Patient", as.numeric(factor(sce_melanoma$Patient)), sep = "#")
sce_melanoma$Sample_publish <- paste(sce_melanoma$Patient_publish,
                                     sce_melanoma$Condition, sep = " ")


meta_data_publish <- colData(sce_melanoma)[, c("Patient_publish",
                                               "Sample_publish",
                                               "Patient",
                                               "Sample",
                                               "Sample.type",
                                               "Condition")]
meta_data_publish <- unique(meta_data_publish)
meta_data_publish <- data.frame(meta_data_publish)
write.csv(meta_data_publish, file = "source_data/final/meta_remap_sample.csv")

5 Exploratory data analysis

rownames(sce_melanoma) <- rowData(sce_melanoma)$Symbol
hvg_fit <- scran::modelGeneVar(sce_melanoma, block = sce_melanoma$Sample)
hvg <- scran::getTopHVGs(hvg_fit, n = 2000)

use_bpparam <- MulticoreParam(workers = 10)
BiocParallel::bpprogressbar(use_bpparam) <- TRUE

filter <- grepl("RPL|RPS|^MT-|ERCC|MTMR|MTND", rownames(sce_melanoma))
gene_corMat <- qlcMatrix::cosSparse(t((logcounts(sce_melanoma)[filter, ])), 
                                    t((logcounts(sce_melanoma)[!filter, ])))
gene_corMat_max <- apply(gene_corMat, 2, max, na.rm = TRUE)
exclude_genes <- c(rownames(sce_melanoma)[filter], 
                   names(gene_corMat_max)[gene_corMat_max > 0.8])
filter <- rownames(sce_melanoma) %in% exclude_genes

hvg <- setdiff(hvg, exclude_genes)
set.seed(2022)
sce_melanoma <- runPCA(sce_melanoma, ncomponents = 20, subset_row = hvg, BPPARAM = use_bpparam, BSPARAM = RandomParam())
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |======================================================================| 100%
## 
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |======================================================================| 100%
## 
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |======================================================================| 100%
## 
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |======================================================================| 100%
## 
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |======================================================================| 100%
## 
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |======================================================================| 100%
set.seed(2022)
sce_melanoma <- runUMAP(sce_melanoma, dimred = "PCA", min_dist = 0.3, verbose = TRUE)

5.1 Cell type proportion

df_toPlot <- moon::makeMoonDF(sce_melanoma)
ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = scClassify_celltype)) +
  geom_scattermore() +
  scale_color_manual(values = celltype_color) +
  theme(aspect.ratio = 1) +
  labs(color = "")

ggsave(file.path(figures_dir, "umap_celltype.pdf"), width = 8, height = 6)
saveRDS(df_toPlot[, c("UMAP1", "UMAP2", "scClassify_celltype")], 
        file = file.path(sourcedata_dir, "umap_celltype.rds"))


ggplot(df_toPlot[!df_toPlot$scClassify_celltype %in% c("intermediate", "unassigned"), ],
       aes(x = factor(Sample, levels = rev(unique(df_toPlot$Sample2))), 
           fill = scClassify_celltype)) +
  geom_bar(position = "fill") +
  scale_fill_manual(values = celltype_color) +
  theme(aspect.ratio = 0.4) +
  labs(fill = "") +
  coord_flip() +
  facet_grid(factor(Sample.type, levels = rev(names(table(df_toPlot$Sample.type))))~., scale = "free") +
  xlab("") +
  ylab("Cell type proportion")

ggsave(file.path(figures_dir, "celltype_proportion.pdf"), width = 8, height = 4)
saveRDS(df_toPlot[!df_toPlot$scClassify_celltype %in% c("intermediate", "unassigned"), 
                  c("UMAP1", "UMAP2", "scClassify_celltype", "Sample", "Sample.type")], 
        file = file.path(sourcedata_dir, "celltype_proportion.rds"))



ggplot(df_toPlot[!df_toPlot$scClassify_celltype %in% c("intermediate", "unassigned"), ],
       aes(x = factor(Sample_publish, levels = rev(sort(unique(df_toPlot$Sample_publish)))), 
           fill = scClassify_celltype)) +
  geom_bar(position = "fill") +
  scale_fill_manual(values = celltype_color) +
  theme(aspect.ratio = 0.4) +
  labs(fill = "") +
  coord_flip() +
  facet_grid(factor(Sample.type, levels = rev(names(table(df_toPlot$Sample.type))))~., scale = "free") +
  xlab("") +
  ylab("Cell type proportion")

ggsave(file.path(figures_dir, "celltype_proportion_publish.pdf"), width = 8, height = 4)
saveRDS(df_toPlot[!df_toPlot$scClassify_celltype %in% c("intermediate", "unassigned"), 
                  c("UMAP1", "UMAP2", "scClassify_celltype", "Sample_publish", "Sample.type")], 
        file = file.path(sourcedata_dir, "celltype_proportion_publish.rds"))
celltype_prop <- table(df_toPlot[!df_toPlot$scClassify_celltype %in% c("intermediate", "unassigned"), ]$scClassify_celltype,
                       df_toPlot[!df_toPlot$scClassify_celltype %in% c("intermediate", "unassigned"), ]$Sample)
celltype_prop <- apply(celltype_prop, 2, function(x) x/sum(x))
celltype_prop <- melt(celltype_prop)
colnames(celltype_prop) <- c("Celltype", "Sample", "Prop")
sample_meta <- unique(df_toPlot[, c("Sample", "Sample.type", "Condition", "Patient", 
                                    "Sample_publish", "Patient_publish")])
celltype_prop <- merge(celltype_prop, sample_meta, by = "Sample")
ggplot(celltype_prop, aes(x = Condition, y = Prop * 100, color = Sample.type)) +
  geom_line(aes(group = Patient), color = "black") +
  geom_point() +
  facet_wrap(~Celltype, nrow = 1, scale = "free") +
  scale_color_brewer(palette = "Dark2") +
  theme(aspect.ratio = 1, legend.position = "bottom") +
  ylab("Cell type proportion") 

ggsave(file.path(figures_dir, "celltype_proportion_dots.pdf"), width = 13, height = 4)
saveRDS(celltype_prop, 
        file = file.path(sourcedata_dir, "celltype_proportion_dots.rds"))


ggplot(celltype_prop, aes(x = Condition, y = Prop * 100, color = Condition)) +
  geom_line(aes(group = Patient), color = "black") +
  geom_point() +
  facet_grid(Celltype~Sample.type, scale = "free") +
  scale_color_brewer(palette = "Set1") +
  theme(aspect.ratio = 1, legend.position = "bottom") +
  ylab("Cell type proportion") 

ggsave(file.path(figures_dir, "celltype_proportion_dots_v2.pdf"), width = 8, height = 13)
ggplot(celltype_prop, aes(x = Sample.type, y = Prop * 100, color = Condition)) +
  geom_boxplot() +
  facet_wrap(~Celltype, nrow = 1, scale = "free") +
  scale_color_brewer(palette = "Set1") +
  theme(aspect.ratio = 1, legend.position = "bottom") +
  ylab("Cell type proportion") 

ggsave(file.path(figures_dir, "celltype_proportion_boxplot.pdf"), width = 13, height = 4)
lmer_res <- list()
for (i in levels(celltype_prop$Celltype)) {
  df_subset <- celltype_prop[celltype_prop$Celltype == i, ]
  
  lmer_res[[i]] <- lmerTest::lmer(Prop ~ Sample.type * Condition + (1|Patient), data = df_subset)
}


lapply(lmer_res, function(x) round(summary(x)$coefficients, 3))
## $`B Cells`
##                                        Estimate Std. Error    df t value
## (Intercept)                               0.038      0.039 8.052   0.981
## Sample.typeTreatment naïve                0.046      0.055 8.052   0.848
## ConditionDT                               0.002      0.004 8.000   0.398
## Sample.typeTreatment naïve:ConditionDT   -0.006      0.006 8.000  -0.972
##                                        Pr(>|t|)
## (Intercept)                               0.355
## Sample.typeTreatment naïve                0.421
## ConditionDT                               0.701
## Sample.typeTreatment naïve:ConditionDT    0.359
## 
## $`Endothelial Cells`
##                                        Estimate Std. Error    df t value
## (Intercept)                                   0          0 12.62   1.479
## Sample.typeTreatment naïve                    0          0 12.62  -0.461
## ConditionDT                                   0          0  8.00   0.602
## Sample.typeTreatment naïve:ConditionDT        0          0  8.00  -0.152
##                                        Pr(>|t|)
## (Intercept)                               0.164
## Sample.typeTreatment naïve                0.653
## ConditionDT                               0.564
## Sample.typeTreatment naïve:ConditionDT    0.883
## 
## $Fibroblasts
##                                        Estimate Std. Error    df t value
## (Intercept)                               0.035      0.014 10.23   2.545
## Sample.typeTreatment naïve               -0.031      0.019 10.23  -1.604
## ConditionDT                              -0.009      0.010  8.00  -0.949
## Sample.typeTreatment naïve:ConditionDT    0.010      0.014  8.00   0.739
##                                        Pr(>|t|)
## (Intercept)                               0.029
## Sample.typeTreatment naïve                0.139
## ConditionDT                               0.370
## Sample.typeTreatment naïve:ConditionDT    0.481
## 
## $Macrophages
##                                        Estimate Std. Error    df t value
## (Intercept)                               0.046      0.016 9.846   2.935
## Sample.typeTreatment naïve               -0.024      0.022 9.846  -1.084
## ConditionDT                              -0.005      0.010 8.000  -0.454
## Sample.typeTreatment naïve:ConditionDT    0.004      0.014 8.000   0.249
##                                        Pr(>|t|)
## (Intercept)                               0.015
## Sample.typeTreatment naïve                0.304
## ConditionDT                               0.662
## Sample.typeTreatment naïve:ConditionDT    0.809
## 
## $Monocyte
##                                        Estimate Std. Error    df t value
## (Intercept)                               0.022      0.014 8.063   1.587
## Sample.typeTreatment naïve               -0.019      0.020 8.063  -0.957
## ConditionDT                              -0.002      0.002 8.000  -0.989
## Sample.typeTreatment naïve:ConditionDT    0.001      0.002 8.000   0.306
##                                        Pr(>|t|)
## (Intercept)                               0.151
## Sample.typeTreatment naïve                0.366
## ConditionDT                               0.352
## Sample.typeTreatment naïve:ConditionDT    0.767
## 
## $`NK Cells`
##                                        Estimate Std. Error    df t value
## (Intercept)                               0.009      0.003 8.217   2.802
## Sample.typeTreatment naïve               -0.005      0.005 8.217  -1.124
## ConditionDT                               0.002      0.001 8.000   2.681
## Sample.typeTreatment naïve:ConditionDT   -0.001      0.001 8.000  -0.826
##                                        Pr(>|t|)
## (Intercept)                               0.023
## Sample.typeTreatment naïve                0.293
## ConditionDT                               0.028
## Sample.typeTreatment naïve:ConditionDT    0.432
## 
## $`Plasma Cells`
##                                        Estimate Std. Error    df t value
## (Intercept)                               0.001      0.001 9.231   1.207
## Sample.typeTreatment naïve                0.001      0.001 9.231   1.322
## ConditionDT                               0.000      0.000 8.000  -0.257
## Sample.typeTreatment naïve:ConditionDT   -0.001      0.001 8.000  -1.181
##                                        Pr(>|t|)
## (Intercept)                               0.257
## Sample.typeTreatment naïve                0.218
## ConditionDT                               0.804
## Sample.typeTreatment naïve:ConditionDT    0.272
## 
## $`T cells`
##                                        Estimate Std. Error    df t value
## (Intercept)                               0.151      0.058 8.866   2.609
## Sample.typeTreatment naïve                0.041      0.082 8.866   0.507
## ConditionDT                               0.060      0.026 8.000   2.293
## Sample.typeTreatment naïve:ConditionDT   -0.047      0.037 8.000  -1.257
##                                        Pr(>|t|)
## (Intercept)                               0.029
## Sample.typeTreatment naïve                0.625
## ConditionDT                               0.051
## Sample.typeTreatment naïve:ConditionDT    0.244
## 
## $`Tumor cells`
##                                        Estimate Std. Error    df t value
## (Intercept)                               0.698      0.091 8.844   7.641
## Sample.typeTreatment naïve               -0.010      0.129 8.844  -0.078
## ConditionDT                              -0.048      0.041 8.000  -1.183
## Sample.typeTreatment naïve:ConditionDT    0.040      0.058 8.000   0.689
##                                        Pr(>|t|)
## (Intercept)                               0.000
## Sample.typeTreatment naïve                0.940
## ConditionDT                               0.271
## Sample.typeTreatment naïve:ConditionDT    0.510
g3 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = Sample_publish)) +
  geom_scattermore() +
  scale_color_tableau(palette = "Tableau 20") +
  theme(aspect.ratio = 1) +
  labs(color = "")


g1 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = Patient_publish)) +
  geom_scattermore() +
  scale_color_tableau(palette = "Tableau 10") +
  theme(aspect.ratio = 1) +
  labs(color = "")



g2 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = Condition)) +
  geom_scattermore() +
  scale_color_brewer(palette = "Set1") +
  theme(aspect.ratio = 1) +
  labs(color = "")




g4 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = Sample.type)) +
  geom_scattermore() +
  scale_color_brewer(palette = "Dark2") +
  theme(aspect.ratio = 1) +
  labs(color = "")

ggarrange(g3, g1, g2, g4, ncol = 2, nrow = 2, align = "hv")

ggsave(file.path(figures_dir, "umap_supp.pdf"), width = 16, height = 12)
saveRDS(df_toPlot[, c("UMAP1", "UMAP2", "Sample", "Patient", "Condition", "Sample.type")], 
        file = file.path(sourcedata_dir, "umap_supp.rds"))

5.2 Comparison of cell type proportion with the flow cyto

flow_prop <- readxl::read_xls("data/Combined Flow  - Copy.xls")
flow_prop <- flow_prop[-1, ]
flow_prop <- data.frame(flow_prop)
flow_prop <- flow_prop[-11, ]
flow_prop$Sample <- flow_prop[, 1]
flow_prop$Sample[flow_prop$Sample == "SCC16_0069 B23_02_16 M3_DMSO_009.fcs"] <- "SCC16_0069_B230216_DMSO"
flow_prop$Sample[flow_prop$Sample == "SCC16_0069 B23_02_16 M3_DT_010.fcs"] <- "SCC16_0069_B230216_DT"

flow_prop$Sample[flow_prop$Sample == "SCC20_0352 B29_10_20 Left Axillary_DMSO_005.fcs"] <- "SCC20_0352_B291020_DMSO"
flow_prop$Sample[flow_prop$Sample == "SCC20_0352 B29_10_20 Left Axillary_DT_006.fcs"] <- "SCC20_0352_B291020_DT"

flow_prop$Sample[flow_prop$Sample == "SMU09_0157 B11_04_2019 Chest wall_DMSO_023.fcs"] <- "SMU09_0157_B110419_DMSO"
flow_prop$Sample[flow_prop$Sample == "SMU09_0157 B11_04_2019 Chest wall_DT_024.fcs"] <- "SMU09_0157_B110419_DT"


flow_prop$Sample[flow_prop$Sample == "SMU17-0209 B13_06_2017 Small Bowel_DMSO_005.fcs"] <- "SMU17_0209_B130617_DMSO"
flow_prop$Sample[flow_prop$Sample == "SMU17-0209 B13_06_2017 Small Bowel_DT_006.fcs"] <- "SMU17_0209_B130617_DT"

flow_prop$Sample[flow_prop$Sample == "SMU18_0283 B09_01_2019 LI LN_Full DMSO_007.fcs"] <- "SMU18_0283_B090119_DMSO"
flow_prop$Sample[flow_prop$Sample == "SMU18_0283 B09_01_2019 LI LN_Full DT_008.fcs"] <- "SMU18_0283_B090119_DT"

flow_prop$Sample[flow_prop$Sample == "Treatment TD_0500 Control_016.fcs"] <- "SCC16_0500_B101017_DMSO"
flow_prop$Sample[flow_prop$Sample == "Treatment TD_0500 DT_017.fcs"] <- "SCC16_0500_B101017_DT"


flow_prop$Sample[flow_prop$Sample == "Treatment 0306_Full stain Control_016.fcs"] <- "SMU19_0306_B290620_DMSO"
flow_prop$Sample[flow_prop$Sample == "Treatment 0306_Full stain DT_017.fcs"] <- "SMU19_0306_B290620_DT"


flow_prop$Sample[flow_prop$Sample == "Treatment TD_0132 Control_014.fcs"] <- "SMU17_0132_B180418_DMSO"
flow_prop$Sample[flow_prop$Sample == "Treatment TD_0132 DT_015.fcs"] <- "SMU17_0132_B180418_DT"



flow_prop$Sample[flow_prop$Sample == "Treatment TD_0017 Control_016.fcs"] <- "SMU18_0017_B110518_DMSO"
flow_prop$Sample[flow_prop$Sample == "Treatment TD_0017 DT_017.fcs"] <- "SMU18_0017_B110518_DT"


flow_prop$Sample[flow_prop$Sample == "Treatment 0075_Distal Control_014.fcs"] <- "SMU17_0075_B070921_DMSO"
flow_prop$Sample[flow_prop$Sample == "Treatment 0075_Distal DT_015.fcs"] <- "SMU17_0075_B070921_DT"

rownames(flow_prop) <- flow_prop$Sample
t_flow <- flow_prop[, "Total.T.cells...Live.dead"]

t_flow <- as.numeric(t_flow)
names(t_flow) <- rownames(flow_prop)

t_celltype_prop <- celltype_prop[celltype_prop$Celltype == "T cells", ]
plot(t_celltype_prop$Prop, t_flow[as.character(t_celltype_prop$Sample)])

df <- cbind(t_celltype_prop, flow = t_flow[as.character(t_celltype_prop$Sample)])
g_T <- ggplot(df, aes(x = Prop * 100, y = flow)) +
  geom_point(aes(color = Sample_publish), size = 3, alpha = 1) +
  theme(aspect.ratio = 1) +
  xlab("% of T cells in single-cell") +
  ylab("% of T cells in flow cyto") +
  scale_color_tableau(palette = "Tableau 20") +
  geom_text(aes(x = 10, y = 30, label = paste("cor =", round(cor(df$Prop * 100, df$flow), 2)))) +
  labs(color = "")
g_T

cor.test(df$Prop * 100, df$flow)
## 
##  Pearson's product-moment correlation
## 
## data:  df$Prop * 100 and df$flow
## t = 4.5041, df = 18, p-value = 0.0002745
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4210162 0.8852757
## sample estimates:
##       cor 
## 0.7279195
ggsave(file.path(figures_dir, "T_cells_celltype_proportion_comparison.pdf"), width = 5, height = 4)
saveRDS(df, 
        file = file.path(sourcedata_dir, "T_cells_celltype_proportion.rds"))



tumor_flow <- flow_prop[, "X..SOX10..MLM...Live.Dead"]
tumor_flow <- gsub(" %", "", tumor_flow)
tumor_flow <- as.numeric(tumor_flow)
names(tumor_flow) <- rownames(flow_prop)

tumor_celltype_prop <- celltype_prop[celltype_prop$Celltype == "Tumor cells", ]
df <- cbind(tumor_celltype_prop, flow = tumor_flow[as.character(tumor_celltype_prop$Sample)])
g_tumor <- ggplot(df, aes(x = Prop * 100, y = flow)) +
  geom_point(aes(color = Sample_publish), size = 3, alpha = 1) +
  theme(aspect.ratio = 1) +
  xlab("% of Tumor cells in single-cell") +
  ylab("% of Tumor cells in flow cyto") +
  scale_color_tableau(palette = "Tableau 20") +
  geom_text(aes(x = 50, y = 90, label = paste("cor =", round(cor(df$Prop * 100, df$flow), 2)))) +
  labs(color = "")
g_tumor

cor.test(df$Prop * 100, df$flow)
## 
##  Pearson's product-moment correlation
## 
## data:  df$Prop * 100 and df$flow
## t = 4.1378, df = 18, p-value = 0.000618
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3700097 0.8714695
## sample estimates:
##      cor 
## 0.698208
ggsave(file.path(figures_dir, "Tumor_cells_celltype_proportion_comparison.pdf"), width = 5, height = 4)
saveRDS(df, 
        file = file.path(sourcedata_dir, "Tumor_cells_celltype_proportion.rds"))

legend <- get_legend(
  # create some space to the left of the legend
  g_T + theme(legend.position = "bottom", legend.box.margin = margin(0, 0, 0, 12))
)
library(cowplot)
plot_grid(plot_grid(g_T + theme(legend.position = "none"), 
                    g_tumor + theme(legend.position = "none"), 
                    labels = c('A', 'B'), align = "hv"),
          legend, ncol = 1, rel_heights = c(1, .1))

ggsave(file.path(figures_dir, "celltype_proportion_comparison.pdf"), width = 8, height = 6)

5.3 Marker epxression

g <- lapply(c("CD3E", "MS4A1", "CD68", "CD14",
              "FAP",
              "SOX10", "AXL", "MITF", "NGFR"), 
            function(x) {
              # o <- order(logcounts(sce_melanoma)[x, ])
              ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, 
                                    color = logcounts(sce_melanoma)[x, ])) +
                geom_scattermore() +
                theme(aspect.ratio = 1) +
                scale_color_gradientn(colors = colorRampPalette(c("grey90", brewer.pal(n = 7, name = "Reds")))(100)) +
                labs(color = "", title = x) 
              
            })
ggarrange(plotlist = g, ncol = 3, nrow = 3, align = "hv")

ggsave(file.path(figures_dir, "celltype_marker_example.pdf"), width = 12, height = 10)
library(cellyx)
subset_idx <- !sce_melanoma$scClassify_celltype %in% c("intermediate", "unassigned", "Tumor cells")
limma_res <- doLimma(logcounts(sce_melanoma)[, subset_idx], sce_melanoma$scClassify_celltype[subset_idx])
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
limma_res <- lapply(limma_res, function(x) x[order(x$logFC, decreasing = TRUE), ])
wb <- createWorkbook()
lapply(seq_along(limma_res), function(i){
  addWorksheet(wb = wb, sheetName = names(limma_res)[i])
  writeData(wb, sheet = names(limma_res)[i], 
            data.frame(limma_res[[i]]))
})
## [[1]]
## [1] 0
## 
## [[2]]
## [1] 0
## 
## [[3]]
## [1] 0
## 
## [[4]]
## [1] 0
## 
## [[5]]
## [1] 0
## 
## [[6]]
## [1] 0
## 
## [[7]]
## [1] 0
## 
## [[8]]
## [1] 0
saveWorkbook(wb, file.path(sourcedata_dir, paste0("immune_celltype_marker", ".xlsx")), overwrite = TRUE)

markers <- lapply(limma_res, function(x) rownames(x)[1:10])

aggExprs <- lapply(names(limma_res), function(x) {
  idx <- sce_melanoma$scClassify_celltype %in% x
  rowMeans(logcounts(sce_melanoma)[unlist(markers), idx])
})
aggExprs <- do.call(cbind, aggExprs)
colnames(aggExprs) <- names(limma_res)
rdbu <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100)
pheatmap(t(aggExprs), cluster_cols = FALSE, cluster_rows = FALSE, 
         scale = "column", color = rdbu, 
         border_color = NA,
         file = file.path(figures_dir, "heatmap_immune_celltype_marker_aggExprs.pdf"),
         height = 3, width = 10)
saveRDS(aggExprs, file.path(sourcedata_dir, paste0("immune_celltype_marker_aggExprs", ".rds")))

6 Numbat results

numbat_classify <- readRDS("/albona/nobackup2/yingxinl/melanoma/results/numbat_tumour_classification.rds")
table(numbat_classify, sce_melanoma$scClassify_celltype)
##                
## numbat_classify B Cells Endothelial Cells Fibroblasts intermediate Macrophages
##          normal    9616                12        1911          971        5028
##          tumor       31                 2         945          830         176
##                
## numbat_classify Monocyte NK Cells Plasma Cells T cells Tumor cells unassigned
##          normal     1549     1084          203   28437        2171        230
##          tumor       139        7           49     166      114848        194
agree_prop <- sum(numbat_classify == "tumor" & sce_melanoma$scClassify_celltype == "Tumor cells")/sum(sce_melanoma$scClassify_celltype == "Tumor cells")
print(agree_prop)
## [1] 0.9814475
g1 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = scClassify_celltype %in% "Tumor cells")) +
  geom_scattermore() +
  scale_color_viridis_d() +
  theme(aspect.ratio = 1) +
  labs(color = "", title = "Tumor in scClassify")


g2 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = numbat_classify %in% "tumor")) +
  geom_scattermore() +
  scale_color_viridis_d() +
  theme(aspect.ratio = 1) +
  labs(color = "", title = "Tumor in Numbat")


g3 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, 
                            color = numbat_classify %in% "tumor" & 
                              scClassify_celltype %in% "Tumor cells")) +
  geom_scattermore() +
  scale_color_viridis_d() +
  theme(aspect.ratio = 1) +
  labs(color = "", title = "Tumor in both")



ggarrange(g1, g2, g3, ncol = 3, nrow = 1, align = "hv")

ggsave(file.path(figures_dir, "umap_numbat.pdf"), width = 15, height = 6)
df_toPlot$numbat_classify <- numbat_classify
saveRDS(df_toPlot[, c("UMAP1", "UMAP2", "numbat_classify", "scClassify_celltype")], 
        file = file.path(sourcedata_dir, "umap_numbat.rds"))



tab <- table(numbat_classify == "tumor", sce_melanoma$scClassify_celltype == "Tumor cells")
print(tab)
##        
##          FALSE   TRUE
##   FALSE  49041   2171
##   TRUE    2539 114848
tab <- melt(tab)
colnames(tab) <- c("Numbat", "scClassify", "Count")

ggplot(tab, aes(x = Numbat, y = factor(scClassify, levels = c(TRUE, FALSE)), fill = Count)) +
  geom_tile() +
  geom_text(aes(label = Count)) +
  scale_fill_viridis_c() +
  theme_minimal() +
  theme(aspect.ratio = 1) +
  ylab("scClassify")

ggsave(file.path(figures_dir, "confusion_numbat_scClassify.rds.pdf"), width = 4, height = 3)
saveRDS(tab, 
        file = file.path(sourcedata_dir, "confusion_numbat_scClassify.rds"))

mclust::adjustedRandIndex(numbat_classify == "tumor", 
                          sce_melanoma$scClassify_celltype == "Tumor cells")
## [1] 0.888796

7 Save data

saveRDS(sce_melanoma, file = file.path(sourcedata_dir, "sce_melanoma.rds"))

8 Session Info

sessionInfo()
## R version 4.4.0 (2024-04-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Debian GNU/Linux 12 (bookworm)
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.21.so;  LAPACK version 3.11.0
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## time zone: Australia/Sydney
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] cellyx_0.1.0                cowplot_1.1.1              
##  [3] openxlsx_4.2.5.2            BiocNeighbors_1.18.0       
##  [5] BiocSingular_1.16.0         BiocParallel_1.34.2        
##  [7] ggrepel_0.9.3               ggalluvial_0.12.5          
##  [9] Rtsne_0.16                  rcartocolor_2.1.1          
## [11] ggridges_0.5.4              scran_1.28.2               
## [13] scater_1.28.0               scuttle_1.10.2             
## [15] scattermore_1.2             UpSetR_1.4.0               
## [17] RColorBrewer_1.1-3          gridExtra_2.3              
## [19] reshape2_1.4.4              pheatmap_1.0.12            
## [21] moon_0.1.0                  ggpubr_0.6.0               
## [23] ggthemes_4.2.4              ggplot2_3.4.3              
## [25] dplyr_1.1.2                 SingleCellExperiment_1.22.0
## [27] SummarizedExperiment_1.30.2 Biobase_2.60.0             
## [29] GenomicRanges_1.52.0        GenomeInfoDb_1.36.1        
## [31] IRanges_2.34.1              S4Vectors_0.38.1           
## [33] BiocGenerics_0.46.0         MatrixGenerics_1.12.3      
## [35] matrixStats_1.0.0          
## 
## loaded via a namespace (and not attached):
##   [1] spatstat.sparse_3.0-2     bitops_1.0-7             
##   [3] httr_1.4.6                docopt_0.7.1             
##   [5] numDeriv_2016.8-1.1       tools_4.4.0              
##   [7] sctransform_0.3.5         backports_1.4.1          
##   [9] utf8_1.2.3                R6_2.5.1                 
##  [11] lazyeval_0.2.2            uwot_0.1.16              
##  [13] withr_2.5.0               sp_2.0-0                 
##  [15] progressr_0.14.0          cli_3.6.1                
##  [17] textshaping_0.3.6         spatstat.explore_3.2-1   
##  [19] labeling_0.4.2            slam_0.1-50              
##  [21] sass_0.4.9                Seurat_4.3.0.1           
##  [23] spatstat.data_3.0-1       pbapply_1.7-2            
##  [25] systemfonts_1.0.4         parallelly_1.36.0        
##  [27] limma_3.56.2              readxl_1.4.3             
##  [29] rstudioapi_0.15.0         generics_0.1.3           
##  [31] ica_1.0-3                 spatstat.random_3.1-5    
##  [33] car_3.1-2                 zip_2.3.0                
##  [35] DoubletFinder_2.0.3       Matrix_1.6-0             
##  [37] ggbeeswarm_0.7.2          fansi_1.0.4              
##  [39] abind_1.4-5               lifecycle_1.0.3          
##  [41] yaml_2.3.7                edgeR_3.42.4             
##  [43] carData_3.0-5             SparseArray_1.4.8        
##  [45] grid_4.4.0                promises_1.2.0.1         
##  [47] dqrng_0.3.0               crayon_1.5.2             
##  [49] miniUI_0.1.1.1            lattice_0.22-6           
##  [51] beachmat_2.16.0           pillar_1.9.0             
##  [53] knitr_1.43                metapod_1.8.0            
##  [55] boot_1.3-30               future.apply_1.11.0      
##  [57] codetools_0.2-20          leiden_0.4.3             
##  [59] glue_1.6.2                data.table_1.14.8        
##  [61] vctrs_0.6.3               png_0.1-8                
##  [63] cellranger_1.1.0          gtable_0.3.3             
##  [65] cachem_1.0.8              xfun_0.39                
##  [67] S4Arrays_1.4.1            mime_0.12                
##  [69] qlcMatrix_0.9.7           survival_3.5-3           
##  [71] statmod_1.5.0             bluster_1.10.0           
##  [73] ellipsis_0.3.2            fitdistrplus_1.1-11      
##  [75] ROCR_1.0-11               nlme_3.1-164             
##  [77] RcppAnnoy_0.0.21          bslib_0.7.0              
##  [79] irlba_2.3.5.1             vipor_0.4.5              
##  [81] KernSmooth_2.23-22        colorspace_2.1-0         
##  [83] tidyselect_1.2.0          compiler_4.4.0           
##  [85] DelayedArray_0.26.7       plotly_4.10.2            
##  [87] scales_1.2.1              lmtest_0.9-40            
##  [89] stringr_1.5.0             digest_0.6.33            
##  [91] goftest_1.2-3             spatstat.utils_3.0-3     
##  [93] minqa_1.2.5               sparsesvd_0.2-2          
##  [95] rmarkdown_2.23            XVector_0.40.0           
##  [97] htmltools_0.5.8.1         pkgconfig_2.0.3          
##  [99] lme4_1.1-34               sparseMatrixStats_1.12.2 
## [101] highr_0.10                fastmap_1.1.1            
## [103] rlang_1.1.1               htmlwidgets_1.6.2        
## [105] shiny_1.7.5               DelayedMatrixStats_1.22.1
## [107] farver_2.1.1              jquerylib_0.1.4          
## [109] zoo_1.8-12                jsonlite_1.8.7           
## [111] mclust_6.0.0              RCurl_1.98-1.12          
## [113] magrittr_2.0.3            GenomeInfoDbData_1.2.10  
## [115] patchwork_1.1.2           munsell_0.5.0            
## [117] Rcpp_1.0.11               viridis_0.6.4            
## [119] reticulate_1.30           stringi_1.7.12           
## [121] zlibbioc_1.46.0           MASS_7.3-60.0.1          
## [123] plyr_1.8.8                parallel_4.4.0           
## [125] listenv_0.9.0             deldir_1.0-9             
## [127] splines_4.4.0             tensor_1.5               
## [129] locfit_1.5-9.8            igraph_1.5.0.1           
## [131] spatstat.geom_3.2-4       ggsignif_0.6.4           
## [133] ScaledMatrix_1.8.1        evaluate_0.21            
## [135] SeuratObject_4.1.3        nloptr_2.0.3             
## [137] httpuv_1.6.11             RANN_2.6.1               
## [139] tidyr_1.3.0               purrr_1.0.1              
## [141] polyclip_1.10-4           future_1.33.0            
## [143] rsvd_1.0.5                broom_1.0.5              
## [145] xtable_1.8-4              rstatix_0.7.2            
## [147] later_1.3.1               viridisLite_0.4.2        
## [149] ragg_1.2.5                tibble_3.2.1             
## [151] lmerTest_3.1-3            beeswarm_0.4.0           
## [153] cluster_2.1.6             globals_0.16.2